Preperation of the capelin (Mallotus villosus) data obtained from OBIS. This database shows the global occurance of capelin.
A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. The code herein is unlikely to be elegant and there is likely more efficient ways of running the code.
Built with ‘r getRversion()’.
You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first. Th
packages <- c("rworldmap", "plyr")
source("../src/ipak.R")
ipak(packages)
rworldmap plyr
TRUE TRUE
suppressMessages(ipak)
function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
Load the raw data, then check use head() to check it looks ok, then names() to see all the column names
rawdata <- read.csv("../data/bio/capelin_obis_raw.csv")
head(rawdata)
names(rawdata)
[1] "id" "valid_id"
[3] "resource_id" "position_id"
[5] "latitude" "longitude"
[7] "datecollected" "depth"
[9] "qc" "institutioncode"
[11] "collectioncode" "catalognumber"
[13] "yearcollected" "individualcount"
[15] "resname" "tname"
[17] "tauthor" "worms_id"
[19] "scientificnameid" "storedpath"
[21] "originalscientificname" "geom"
[23] "phylum" "class"
[25] "order" "family"
[27] "genus" "species"
[29] "phylum_id" "class_id"
[31] "order_id" "family_id"
[33] "genus_id" "species_id"
[35] "group_id" "node_id"
[37] "id.1" "acceptednameusage"
[39] "acceptednameusageid" "accessrights"
[41] "associatedmedia" "associatedoccurrences"
[43] "associatedreferences" "associatedsequences"
[45] "associatedtaxa" "basisofrecord"
[47] "bed" "behavior"
[49] "bibliographiccitation" "collectionid"
[51] "continent" "coordinateprecision"
[53] "coordinateuncertaintyinmeters" "countrycode"
[55] "county" "datageneralizations"
[57] "datasetid" "dateidentified"
[59] "disposition" "dynamicproperties"
[61] "establishmentmeans" "eventdate"
[63] "eventid" "eventremarks"
[65] "eventtime" "fieldnotes"
[67] "fieldnumber" "footprintspatialfit"
[69] "footprintsrs" "footprintwkt"
[71] "geodeticdatum" "group"
[73] "habitat" "higherclassification"
[75] "highergeography" "highergeographyid"
[77] "identificationid" "identificationqualifier"
[79] "identificationreferences" "identificationremarks"
[81] "identificationverificationstatus" "identifiedby"
[83] "individualid" "informationwithheld"
[85] "infraspecificepithet" "institutionid"
[87] "island" "islandgroup"
[89] "language" "lifestage"
[91] "locality" "locationaccordingto"
[93] "locationid" "locationremarks"
[95] "materialsampleid" "maximumdepthinmeters"
[97] "minimumdepthinmeters" "modified"
[99] "municipality" "nameaccordingto"
[101] "nameaccordingtoid" "namepublishedin"
[103] "namepublishedinid" "occurrencedetails"
[105] "occurrenceid" "occurrenceremarks"
[107] "occurrencestatus" "originalnameusage"
[109] "originalnameusageid" "othercatalognumbers"
[111] "ownerinstitutioncode" "recordedby"
[113] "recordnumber" "references"
[115] "reproductivecondition" "rights"
[117] "rightsholder" "samplesize"
[119] "sex" "source"
[121] "specificepithet" "stateprovince"
[123] "taxonconceptid" "taxonid"
[125] "taxonomicstatus" "taxonrank"
[127] "taxonremarks" "type"
[129] "typestatus" "vernacularname"
[131] "waterbody"
following Darwin Core rename the latitude and longitude columns to decimalLatitude and decimalLongitude respectively
tidydata <- rawdata
names(tidydata)[names(tidydata)=="latitude"] <- "decimalLatitude"
names(tidydata)[names(tidydata)=="longitude"] <- "decimalLongitude"
head(tidydata)
The dataset is global and needs to be trimmed to contain just the area of interest. Plot all the occurance points
library(rworldmap)
### Welcome to rworldmap ###
For a short introduction type : vignette('rworldmap')
map <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map, xlim = c(-180, -44), ylim =c(0,80), asp = 1) #the x and y lim are the long-lat bounds of the map
points(tidydata$decimalLongitude, tidydata$decimalLatitude) #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
now extract just the Atlantic Canada data. The lon-lat study area (which captures the whole Atlantic Canadian EEZ up to the north of Resolution Island in the Hudson Strait/ Davis Strait) is -71.0,-43.0,62.0,38.0
tidydata <- tidydata[tidydata$decimalLongitude > -71, ]
tidydata <- tidydata[tidydata$decimalLongitude < -43, ]
tidydata <- tidydata[tidydata$decimalLatitude > 38, ]
plot(map, xlim = c(-180, -44), ylim =c(0,80), asp = 1)
points(tidydata$decimalLongitude, tidydata$decimalLatitude)
ok looks good…save what you have so far to a .csv
write.csv(tidydata, file = "../data/bio/capelin_obis_tidy.csv", row.names = FALSE)
So there are many columns that you don’t need.. you want to keep id decimalLatitude decimalLongitude datecollected institutioncode individualcount resname originalscientificname collectioncode (this contains some info that might help with finding out gear used in the survey)
tidydata <- subset(tidydata, select = c(id, decimalLatitude, decimalLongitude, datecollected, institutioncode, individualcount, resname, originalscientificname, collectioncode))
head(tidydata)
make columns for the years and month based on the datecollected column
tidydata$year <- format(as.Date(tidydata$datecollected), "%Y") #created a new column called year
tidydata$month <- format(as.Date(tidydata$datecollected), "%m") #created a new column called month
tidydata$day <- format(as.Date(tidydata$datecollected), "%d") #created a new column called day
head(tidydata)
the year, month, day columns are characters Change to numeric
tidydata$year <- as.numeric(tidydata$year)
tidydata$month <- as.numeric(tidydata$month)
tidydata$day <- as.numeric(tidydata$day)
head(tidydata)
count NAs in the dataset. Mainly you are interested in the datecollected column (ignore individual count - you know there is a lot missing)
missingdata <- apply(tidydata, 2, function (x) sum(is.na(x)))
missingdata
id decimalLatitude decimalLongitude
0 0 0
datecollected institutioncode individualcount
0 0 16202
resname originalscientificname collectioncode
0 0 0
year month day
771 771 771
hmm interesting - you have 771 columns from year month and day with missing data, but datacollected has 0. There may be a formatting issue. To take a closer look, subset all rows where year == na
missingdatarows <- tidydata[is.na(tidydata$year), ]
head(missingdatarows)
so the NAs in the year column relate to no data (without a na value attached). Remove all rows where year == na
tidydata <- tidydata[!is.na(tidydata$year), ]
missingdata2 <- apply(tidydata, 2, function (x) sum(is.na(x)))
missingdata2
id decimalLatitude decimalLongitude
0 0 0
datecollected institutioncode individualcount
0 0 15445
resname originalscientificname collectioncode
0 0 0
year month day
0 0 0
The dplyr package’s distinct function removes any duplicate rows (based on all columns EXCEPT id as this is always going to be unique, and year month day as this is generated from datecollected)
library("dplyr")
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
tidydata <- distinct(tidydata, decimalLatitude, decimalLongitude, datecollected, institutioncode, institutioncode, individualcount, resname, originalscientificname, .keep_all = T) #the .keep_all means that id isn't excluded from the output
head(tidydata)
Lets take a look at the institutioncode to make sure the names make sense…
tidydata$institutioncode <- as.character(tidydata$institutioncode)
whosampled <- unique(tidydata$institutioncode) #makes a variable of institutions
whosampledordered <- sort(whosampled) #just puts the list in
whosampledordered
[1] ""
[2] "Bedford Institute of Oceanography (BIO)"
[3] "BIO"
[4] "DFO-Central and Arctic, Freshwater Institute (FWI)"
[5] "DFO-IML"
[6] "DFO-ISDM"
[7] "DFO-NFLD"
[8] "DFO-NG"
[9] "DFO-SF"
[10] "DFO-SG"
[11] "DFO Gulf Fisheries Centre"
[12] "ICES"
[13] "IML-MLI"
[14] "IML_MLI"
[15] "IMR"
[16] "Maine DMR"
[17] "NMFS-NEFSC"
[18] "NOAA, NMFS, Northeast Fisheries Science Center"
[19] "North Atlantic Fisheries Centre"
[20] "Northwest Atlantic Fisheries Organization (NAFO)"
[21] "ROM"
[22] "The Atlantic reference Centre"
[23] "The Atlantic Reference Centre (ARC)"
[24] "USNM"
Ok so a number of these names actually represent the same insitiution (e.g. DFO Regions. Some details here and the first is no institution (you might be able to figure it out though). The merges needed are as follows “Bedford Institute of Oceanography (BIO)” & “BIO” & “DFO-SF” == DFOMTMS “DFO-IML” & “IML-MLI” & “IML_MLI” & “DFO-NG” == DFOQC “DFO-NFLD” & “North Atlantic Fisheries Centre” == “DFONL” “NMFS-NEFSC” & “NOAA, NMFS, Northeast Fisheries Science Center” == NEFSC “The Atlantic reference Centre” & “The Atlantic Reference Centre (ARC)” == ARC “DFO-SG” & “DFO Gulf Fisheries Centre” == DFOGULF
and some you can just make shorter “Northwest Atlantic Fisheries Organization (NAFO)” == NAFO “DFO-ISDM” == DFOISDM “Maine DMR” == MaineDMR
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "Bedford Institute of Oceanography (BIO)", "DFOMTMS")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "BIO", "DFOMTMS")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-SF", "DFOMTMS")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-IML", "DFOQC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "IML-MLI", "DFOQC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-NG", "DFOQC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "IML_MLI", "DFOQC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-NFLD", "DFONL")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "North Atlantic Fisheries Centre", "DFONL")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "NMFS-NEFSC", "NEFSC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "NOAA, NMFS, Northeast Fisheries Science Center", "NEFSC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "The Atlantic Reference Centre (ARC)", "ARC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "The Atlantic reference Centre", "ARC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-SG", "DFOGulf")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO Gulf Fisheries Centre", "DFOGulf")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "Northwest Atlantic Fisheries Organization (NAFO)", "NAFO")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-Central and Arctic, Freshwater Institute (FWI)", "DFOCENARC")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "DFO-ISDM", "DFOISDM")
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "Maine DMR", "MaineDMR")
#recheck
tidydata$institutioncode <- as.character(tidydata$institutioncode)
whosampled <- unique(tidydata$institutioncode) #makes a variable of institutions
whosampledordered <- sort(whosampled) #just puts the list in
whosampledordered
[1] "" "ARC" "DFOCENARC" "DFOGulf" "DFOISDM" "DFOMTMS"
[7] "DFONL" "DFOQC" "ICES" "IMR" "MaineDMR" "NAFO"
[13] "NEFSC" "ROM" "USNM"
Now look at the missing institution. Use resname to see if it helps…
missinginst <- subset(tidydata, institutioncode == "")
missinginst
ok now check for unique resname…
missinginst$resname <- as.character(missinginst$resname)
missinginstunique <- unique(missinginst$resname) #makes a variable of institutions
missinginstuniquedordered <- sort(missinginstunique) #just puts the list in
missinginstuniquedordered <- as.character(missinginstuniquedordered)
missinginstuniquedordered
[1] "Arctic Species Trend Index (ASTI) : Marine"
Ok soo looking at the OBIS metadata, the institution is the Conservation of Arctic Flora and Fauna (CAFF)
tidydata$institutioncode <- replace(tidydata$institutioncode, tidydata$institutioncode == "", "CAFF")
whosampled <- unique(tidydata$institutioncode) #makes a variable of institutions
whosampledordered <- sort(whosampled) #just puts the list in
whosampledordered
[1] "ARC" "CAFF" "DFOCENARC" "DFOGulf" "DFOISDM" "DFOMTMS"
[7] "DFONL" "DFOQC" "ICES" "IMR" "MaineDMR" "NAFO"
[13] "NEFSC" "ROM" "USNM"
Now lets check the number of records and spatial-temporal distribution of the observations by institution code to make sure none are dodgy
first a table of how many observations each instituioncode has
library(plyr) #to use the count function
--------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
--------------------------------------------------------------------------
Attaching package: 㤼㸱plyr㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
arrange, count, desc, failwith, id, mutate, rename, summarise,
summarize
obs_by_ins <- count(tidydata, "institutioncode")
obs_by_ins
write.csv(obs_by_ins, file = "../output/bio/no_observations_institutioncode.csv")
ok so IMR, NAFO, ICES, & USM have very few entries (1, 1, 1, and 3 respectively)
Lets take a look at the spatial breakdown of these institutions.First all points…
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(38, 62), asp = 1, main = "All Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(tidydata$decimalLongitude, tidydata$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/all_occurrences.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
Note there is one point up by iceland that you should get rid of (Icelandic population thought to be seperate from Labrador, but it is unclear if this is true or not).
Map the institutioncode == ARC only data…
ARC_obs <- tidydata[tidydata$institutioncode == "ARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "Arc Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ARC_obs$decimalLongitude, ARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/ARC_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
CAFF_obs <- tidydata[tidydata$institutioncode == "CAFF", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "CAFF Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(CAFF_obs$decimalLongitude, CAFF_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/CAFF_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFOCENARC_obs <- tidydata[tidydata$institutioncode == "DFOCENARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO Central & Arctic Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOCENARC_obs$decimalLongitude, DFOCENARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFOCENARC_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFOGulf_obs <- tidydata[tidydata$institutioncode == "DFOGulf", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO Gulf Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOGulf_obs$decimalLongitude, DFOGulf_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFOGulf_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFOISDM_obs <- tidydata[tidydata$institutioncode == "DFOISDM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO ISDM Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOISDM_obs$decimalLongitude, DFOISDM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFOISDM_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFOMTMS_obs <- tidydata[tidydata$institutioncode == "DFOMTMS", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO Maritimes Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOMTMS_obs$decimalLongitude, DFOMTMS_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFOMTMS_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFONL_obs <- tidydata[tidydata$institutioncode == "DFONL", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO Newfouundland & Labrador Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFONL_obs$decimalLongitude, DFONL_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFONL_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
DFOQC_obs <- tidydata[tidydata$institutioncode == "DFOQC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "DFO Quebec Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOQC_obs$decimalLongitude, DFOQC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/DFOQC_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
ICES_obs <- tidydata[tidydata$institutioncode == "ICES", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "ICES Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ICES_obs$decimalLongitude, ICES_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/ICES_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
IMR_obs <- tidydata[tidydata$institutioncode == "IMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "IMR Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(IMR_obs$decimalLongitude, IMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/IMR_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
MaineDMR_obs <- tidydata[tidydata$institutioncode == "MaineDMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "Maine DMR Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(MaineDMR_obs$decimalLongitude, MaineDMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/MaineDMR_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
NAFO_obs <- tidydata[tidydata$institutioncode == "NAFO", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "NAFO Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NAFO_obs$decimalLongitude, NAFO_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/NAFO_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
NEFSC_obs <- tidydata[tidydata$institutioncode == "NEFSC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "NEFSC Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NEFSC_obs$decimalLongitude, NEFSC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/NEFSC_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
ROM_obs <- tidydata[tidydata$institutioncode == "ROM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "Royal Ontario Museum Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/ROM_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
USNM_obs <- tidydata[tidydata$institutioncode == "USNM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "National Museum of Natural History Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(USNM_obs$decimalLongitude, USNM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/USNM_occurrences_all.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
Looking at the maps, the ROM, NAFO, IMR, ICES have data next to iceland. Since NAFO, IMR, and ICES only have one data point each, you can remove these observations easily from the record…
tidydata <- tidydata[!tidydata$institutioncode == "NAFO", ]
tidydata <- tidydata[!tidydata$institutioncode == "IMR", ]
tidydata <- tidydata[!tidydata$institutioncode == "ICES", ]
Now for ROM there are more data points… Use indentify function which lets you click on a point on the plot
x11() #this opens the plot i a window you can interact with
plot(map2, xlim = c(-71, -43), ylim =c(39, 62), asp = 1, main = "Royal Ontario Museum Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude"))
identify(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, labels=ROM_obs$decimalLongitude) #decimalLongitude chosen as plot identifier as the values are unique
[1] 2
The point that needs to be removed is located at -44.96667. Note this is not the full value so use some other locators to remove the row (you could just used id, but pick multiple others just to learn)
tidydata <- tidydata[!(tidydata$resname == "Fish specimens" & tidydata$datecollected == "1965-08-13 12:00:00+01"), ]
now replot all data just to check…
plot(map2, xlim = c(-71, -43), ylim =c(38, 62), asp = 1, main = "All Occurrences without Iceland", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(tidydata$decimalLongitude, tidydata$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/all_occurrences_less_iceland.png") #this prints a png of the plot
png
3
dev.off() #this turns off the print command
png
2
great! write what you have so far to a csv
write.csv(tidydata, file = "../data/bio/capelin_obis_tidy.csv", row.names = FALSE)
The BIOMER dataset starts in 1998-01 (GLORYS 1993), so cut all occurrences prior to 1998-01
tidydata <- tidydata[tidydata$year >= 1998, ]
and write to a csv…
write.csv(tidydata, file = "../data/bio/capelin_obis_tidy.csv", row.names = FALSE)
There are some more columns you want to add… - Occurrence - presence/background (1/0) - NAFO region - Gear type - Unique Cell ID (based on the environmental data)
so not to mess with tidydata, create a new variable and write a new .csv
adddata <- tidydata
write.csv(adddata, file = "../data/bio/capelin_additional_tidy.csv", row.names = FALSE)
first create a new column for Occurence, and populate with “1” (because these are all presences)
adddata$occurrence <- "1"
head(adddata)
NAFO Zones shapefile obtained from NAFO
library(rgdal)
rgdal: version: 1.3-4, (SVN revision 766)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
Path to GDAL shared files: C:/Users/bunny/Documents/GitHub/Chapter_1/packrat/lib/x86_64-w64-mingw32/3.5.1/rgdal/gdal
GDAL binary built with GEOS: TRUE
Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
Path to PROJ.4 shared files: C:/Users/bunny/Documents/GitHub/Chapter_1/packrat/lib/x86_64-w64-mingw32/3.5.1/rgdal/proj
Linking to sp version: 1.3-1
Great. Lets see if any are missing a NAFO Zone. But first, write to a csv and reload as the data is now some wierd ‘spatial points dataframe’…
write.csv(adddata, "../data/bio/capelin_additional_tidy.csv", row.names=FALSE)
adddata <- read.csv("../data/bio/capelin_additional_tidy.csv")
nafo_na <- subset(adddata, is.na(adddata$nafo_zone))
nafo_na
Uh oh there is a lot. A bet a bunch of them are in the Hudson Strait (which is outside the NAFO Zone).Map the nafo_na points…
plot(map2, xlim = c(-71, -43), ylim =c(38, 62), asp = 1, main = "Occurences missing a NAFO Zone", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(nafo_na$decimalLongitude, nafo_na$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
ok yes - Hudson Strait has many, but there are some near the coast that are probably just missed out due to minor differences in the point location/shapefile boundaries. Start with the Hudson Strait ones as this is easy - anything above 56N, nafo_zone == “HudsonStrait”
adddata$nafo_zone <- as.character(adddata$nafo_zone)
adddata[is.na(adddata)] <- "xx"
adddata$nafo_zone[adddata$nafo_zone == "xx" & adddata$decimalLatitude > 56] <- "HudsonStrait"
and map to check
nafo_na <- subset(adddata, nafo_zone == "xx")
plot(map2, xlim = c(-71, -43), ylim =c(38, 62), asp = 1, main = "Occurences missing a NAFO Zone", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(nafo_na$decimalLongitude, nafo_na$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
so now i need to account for the others (either as occuring on land or just in an area that is missing a NAFO Zone tag) which is more tricky… i might just do this bit in ArcGIS quickly.
write.csv(adddata, file = "../data/bio/capelin_additional_tidy.csv", row.names = FALSE)
So in ArcGIS I saw that these points fall on land. Some are extremely close to the coast, some further inland. There are only 132 of thesepoints, so I will just delete them from the database.
adddata$nafo_zone[adddata$nafo_zone == "xx"] <- NA #make all xx values NA
adddata <- adddata[!is.na(adddata$nafo_zone), ]
write.csv(adddata, file = "../data/bio/capelin_additional_tidy.csv", row.names = FALSE)
To get this information, I need to look at the collectioncode details and the OBIS metadata for each resname and then add.
adddata$gear <- "x" #create a new column called gear and populate with x
#now give a list of the resnames...
whichres <- unique(adddata$resname) #makes a variable of institutions
whichresordered <- sort(whichres) #just puts the list in
whichresordered
[1] Arctic Species Trend Index (ASTI) : Marine
[2] Atlantic Reference Centre Museum of Canadian Atlantic Organisms - Invertebrates and Fishes Data
[3] Atlantic Zone Monitoring Program Maritimes Region (AZMP) plankton datasets. In: Fisheries and Oceans Canada - BioChem archive
[4] BioChem zooplankton samples from the Gully, 2006-2007
[5] BioChem: DFO Atlantic Zone Monitoring Program plankton datasets - Newfoundland and Labrador Region (OBIS Canada)
[6] BioChem: Sameoto zooplankton collection
[7] DFO Central and Arctic Multi-species Stock Assessment Surveys
[8] DFO Gulf Region Groundfish Research Vessel Surveys
[9] DFO Maritimes Research Vessel Trawl Surveys Fish Observations
[10] DFO Newfoundland and Labrador Region Ecosystem Trawl Surveys
[11] DFO Quebec Region Multispecies bottom trawl surveys
[12] Fish specimens
[13] Ichthyology Collection - Royal Ontario Museum
[14] Maine Department of Marine Resources Inshore Trawl Survey 2000–2009
[15] Northeast Fisheries Science Center Bottom Trawl Survey Data
[16] Northern Gulf of St. Lawrence Fishes
16 Levels: Arctic Species Trend Index (ASTI) : Marine ...
Arctic Species Trend Index (ASTI) : Marine. No gear details available
asti <- subset(adddata, resname == "Arctic Species Trend Index (ASTI) : Marine")
asti$collectioncode <- as.character(asti$collectioncode)
asti_col <- unique(asti$collectioncode) #makes a variable of institutions
asti_colordered <- sort(asti_col) #just puts the list in
asti_colordered
[1] ""
there is no information in the collection code. The Metadata on OBIS also gives no details. gear = NA
adddata$gear[adddata$resname == "Arctic Species Trend Index (ASTI) : Marine"] <- "NA"
Atlantic Reference Centre Museum of Canadian Atlantic Organisms - Invertebrates and Fishes Data.
arcm <- subset(adddata, resname == "Atlantic Reference Centre Museum of Canadian Atlantic Organisms - Invertebrates and Fishes Data")
arcm$collectioncode <- as.character(arcm$collectioncode)
arcm_col <- unique(arcm$collectioncode) #makes a variable of institutions
arcm_colordered <- sort(arcm_col) #just puts the list in
arcm_colordered
[1] "ARC"
No gear details in collectioncode. The Metadata on OBIS also gives no details. gear = NA
adddata$gear[adddata$resname == "Atlantic Reference Centre Museum of Canadian Atlantic Organisms - Invertebrates and Fishes Data"] <- "NA"
azmpmar <- subset(adddata, resname == "Atlantic Zone Monitoring Program Maritimes Region (AZMP) plankton datasets. In: Fisheries and Oceans Canada - BioChem archive")
azmpmar$collectioncode <- as.character(azmpmar$collectioncode)
azmpmar_col <- unique(azmpmar$collectioncode) #makes a variable of institutions
azmpmar_colordered <- sort(azmpmar_col) #just puts the list in
azmpmar_colordered
[1] "AZMP-MAR-2003" "AZMP-MARgroundfish-2000" "AZMP-MARgroundfish-2001" "AZMP-MARgroundfish-2003" "AZMP-MARgroundfish-2004"
[6] "AZMP-MARgroundfish-2006" "AZMP-MARgroundfish-2008"
No gear details in collectioncode. The metadata on OBIS indicates vertical plankton tow
adddata$gear[adddata$resname == "Atlantic Zone Monitoring Program Maritimes Region (AZMP) plankton datasets. In: Fisheries and Oceans Canada - BioChem archive"] <- "vertical_plankton_tow"
BioChem zooplankton samples from the Gully, 2006-2007.
biogul <- subset(adddata, resname == "BioChem zooplankton samples from the Gully, 2006-2007")
biogul$collectioncode <- as.character(biogul$collectioncode)
biogul_col <- unique(biogul$collectioncode) #makes a variable of institutions
biogul_colordered <- sort(biogul_col) #just puts the list in
biogul_colordered
[1] "Gully Plankton Ringnet Samples 2006-2007"
Along with the OBIS metadata, indicates gear = vertical plankton tow
adddata$gear[adddata$resname == "BioChem zooplankton samples from the Gully, 2006-2007"] <- "vertical_plankton_tow"
azmpnl <- subset(adddata, resname == "BioChem: DFO Atlantic Zone Monitoring Program plankton datasets - Newfoundland and Labrador Region (OBIS Canada")
azmpnl$collectioncode <- as.character(azmpnl$collectioncode)
azmpnl_col <- unique(azmpnl$collectioncode) #makes a variable of institutions
azmpnl_colordered <- sort(azmpnl_col) #just puts the list in
azmpnl_colordered
character(0)
No details in collectioncode. Obis metadata indicates gear = vertical plankton tow
adddata$gear[adddata$resname == "BioChem: DFO Atlantic Zone Monitoring Program plankton datasets - Newfoundland and Labrador Region (OBIS Canada)"] <- "vertical_plankton_tow"
BioChem: Sameoto zooplankton collection.Vertical plankton tow
biosam <- subset(adddata, resname == "BioChem: Sameoto zooplankton collection")
biosam$collectioncode <- as.character(biosam$collectioncode)
biosam_col <- unique(biosam$collectioncode) #makes a variable of institutions
biosam_colordered <- sort(biosam_col) #just puts the list in
biosam_colordered
[1] "BIO-Sameoto BIONESS samples"
Insufficient details in collectioncode. Obis metadata indicates gear = vertical plankton tow
adddata$gear[adddata$resname == "BioChem: Sameoto zooplankton collection"] <- "vertical_plankton_tow"
DFO Central and Arctic Multi-species Stock Assessment Surveys. A
dfocas <- subset(adddata, resname == "DFO Central and Arctic Multi-species Stock Assessment Surveys")
dfocas$collectioncode <- as.character(dfocas$collectioncode)
dfocas_col <- unique(dfocas$collectioncode) #makes a variable of institutions
dfocas_colordered <- sort(dfocas_col) #just puts the list in
dfocas_colordered
[1] "DFOSurvey_Modified Alfredo-03" "DFOSurvey_Modified Cosmos 2600"
[3] "DFOSurvey_Modified Modified (21\") Campelen" "DFOSurvey_Modified Standard (14\") Campelen"
ok these are all types of bottom trawl.
adddata$gear[adddata$resname == "DFO Central and Arctic Multi-species Stock Assessment Surveys" & adddata$collectioncode == "DFOSurvey_Modified Alfredo-03"] <- "bottom_trawl_alfredo_03"
adddata$gear[adddata$resname == "DFO Central and Arctic Multi-species Stock Assessment Surveys" & adddata$collectioncode == "DFOSurvey_Modified Cosmos 2600"] <- "bottom_trawl_cosmos_2600"
adddata$gear[adddata$resname == "DFO Central and Arctic Multi-species Stock Assessment Surveys" & adddata$collectioncode == "DFOSurvey_Modified Modified (21\") Campelen"] <- "bottom_trawl_campelen_21"
adddata$gear[adddata$resname == "DFO Central and Arctic Multi-species Stock Assessment Surveys" & adddata$collectioncode == "DFOSurvey_Modified Standard (14\") Campelen"] <- "bottom_trawl_campelen_14"
DFO Gulf Region Groundfish Research Vessel Surveys.
dfogulfas <- subset(adddata, resname == "DFO Gulf Region Groundfish Research Vessel Surveys")
dfogulfas$collectioncode <- as.character(dfogulfas$collectioncode)
dfogulfas_col <- unique(dfogulfas$collectioncode) #makes a variable of institutions
dfogulfas_colordered <- sort(dfogulfas_col) #just puts the list in
dfogulfas_colordered
[1] "ECOSYSTEM"
no gear details in collection code. OBIS metadata indicates Three survey vessels and two types of bottom-trawls were used prior to 2003: the E. E. Prince from 1971-1985 using a Yankee 36 trawl, the Lady Hammond from 1985-1991 and the Alfred Needler from 1992-2002, both using a Western IIA trawl. Due to a fire on the Needler, the 2003 survey was conducted by its sister-ship, the Templeman using the Western IIA trawl. This survey started late and was incomplete. The survey was conducted by both the Needler and the Teleost in 2004 and 2005 and by the Teleost since then, using the Western IIA trawl. Note you have not captured ship information above so leave it out here. Your data is 1998 onwards to the gear for all = bottom_trawl_western_IIA
adddata$gear[adddata$resname == "DFO Gulf Region Groundfish Research Vessel Surveys"] <- "bottom_trawl_western_IIA"
DFO Maritimes Research Vessel Trawl Surveys Fish Observations.
dfomas <- subset(adddata, resname == "DFO Maritimes Research Vessel Trawl Surveys Fish Observations")
dfomas$collectioncode <- as.character(dfomas$collectioncode)
dfomas_col <- unique(dfomas$collectioncode) #makes a variable of institutions
dfomas_colordered <- sort(dfomas_col) #just puts the list in
dfomas_colordered
[1] "4VWCOD" "4VWCOD_TELEOST" "COMPARE" "GEORGES" "GEORGES_TELEOST" "SUMMER" "SUMMER_TELEOST"
insuffient information. OBIS metadata just indicates “a variety of trawl gear”. gear = “bottom_trawl”
adddata$gear[adddata$resname == "DFO Maritimes Research Vessel Trawl Surveys Fish Observations"] <- "bottom_trawl"
DFO Newfoundland and Labrador Region Ecosystem Trawl Surveys
dfonas <- subset(adddata, resname == "DFO Newfoundland and Labrador Region Ecosystem Trawl Surveys")
dfonas$collectioncode <- as.character(dfonas$collectioncode)
dfonas_col <- unique(dfonas$collectioncode) #makes a variable of institutions
dfonas_colordered <- sort(dfonas_col) #just puts the list in
dfonas_colordered
[1] "BT_standard_sets"
More useful, OBIS metadata indicates “Starting in the fall of 1995 surveys have been conducted using a Campelen 1800 trawl fitted with rock-hopper foot gear.” gear = bottom_trawl_campelen_1800
adddata$gear[adddata$resname == "DFO Newfoundland and Labrador Region Ecosystem Trawl Surveys"] <- "bottom_trawl_campelen_1800"
DFO Quebec Region Multispecies bottom trawl surveys
dfoqas <- subset(adddata, resname == "DFO Quebec Region Multispecies bottom trawl surveys")
dfoqas$collectioncode <- as.character(dfoqas$collectioncode)
dfoqas_col <- unique(dfoqas$collectioncode) #makes a variable of institutions
dfoqas_colordered <- sort(dfoqas_col) #just puts the list in
dfoqas_colordered
[1] "RELEVES_RECHERCHE"
no gear details in collectioncode. OBIS metadata indicates Campelen 1800. gear = bottom_trawl_campelen_1800
adddata$gear[adddata$resname == "DFO Quebec Region Multispecies bottom trawl surveys"] <- "bottom_trawl_campelen_1800"
fishsp <- subset(adddata, resname == "Fish specimens")
fishsp$collectioncode <- as.character(fishsp$collectioncode)
fishsp_col <- unique(fishsp$collectioncode) #makes a variable of institutions
fishsp_colordered <- sort(fishsp_col) #just puts the list in
fishsp_colordered
[1] "Ichthyology"
no gear detail in collectioncode. OBIS metadata indicates these are museum records. gear = NA
adddata$gear[adddata$resname == "Fish specimens"] <- "NA"
Ichthyology Collection - Royal Ontario Museum
iccol <- subset(adddata, resname == "Ichthyology Collection - Royal Ontario Museum")
iccol$collectioncode <- as.character(iccol$collectioncode)
iccol_col <- unique(iccol$collectioncode) #makes a variable of institutions
iccol_colordered <- sort(iccol_col) #just puts the list in
iccol_colordered
[1] "Fishes"
no gear details in collectioncode. OBIS metadata indicates museum records. gear = NA
adddata$gear[adddata$resname == "Ichthyology Collection - Royal Ontario Museum"] <- "NA"
Maine Department of Marine Resources Inshore Trawl Survey 2000–2009
mdmr <- subset(adddata, resname == "Maine Department of Marine Resources Inshore Trawl Survey 2000–2009")
mdmr$collectioncode <- as.character(mdmr$collectioncode)
mdmr_col <- unique(mdmr$collectioncode) #makes a variable of institutions
mdmr_colordered <- sort(mdmr_col) #just puts the list in
mdmr_colordered
[1] "ME - NH Inshore Groundfish Trawl Survey"
Insufficient infomation in collectioncode. OBIS metadata indicates “a modified version of the shrimp net used in Maine waters. It was designed by the vessel owner and net designer Jeff Flagg to fish for a variety of near-bottom dwelling species without targeting any specific component” gear-type = “bottom_trawl”
adddata$gear[adddata$resname == "Maine Department of Marine Resources Inshore Trawl Survey 2000–2009"] <- "bottom_trawl"
Northeast Fisheries Science Center Bottom Trawl Survey Data
nefsu <- subset(adddata, resname == "Northeast Fisheries Science Center Bottom Trawl Survey Data")
nefsu$collectioncode <- as.character(nefsu$collectioncode)
nefsu_col <- unique(nefsu$collectioncode) #makes a variable of institutions
nefsu_colordered <- sort(nefsu_col) #just puts the list in
nefsu_colordered
[1] "SPRING NMFS NEFSC BOTTOM TRAWL SURVEY"
insufficient detail in collection code. OBIS metadata indicates “bottom trawl gear”. gear = “bottom_trawl”
adddata$gear[adddata$resname == "Northeast Fisheries Science Center Bottom Trawl Survey Data"] <- "bottom_trawl"
Northern Gulf of St. Lawrence Fishes
ngsl <- subset(adddata, resname == "Northern Gulf of St. Lawrence Fishes")
ngsl$collectioncode <- as.character(ngsl$collectioncode)
ngsl_col <- unique(ngsl$collectioncode) #makes a variable of institutions
ngsl_colordered <- sort(ngsl_col) #just puts the list in
ngsl_colordered
[1] "Poisson"
no gear details in collectioncode. OBIS metadata indicates “Sampling methods used ranged from fish traps and seine nets on the coasts to offshore trawling”. gear = NA
adddata$gear[adddata$resname == "Northern Gulf of St. Lawrence Fishes"] <- "NA"
ok check to see if all of gear is done (ie if there are any x left)
gearcheck <- subset(adddata, gear == "x")
gearcheck
yay - all done.
year
with(adddata, table(year, institutioncode))
institutioncode
year ARC CAFF DFOCENARC DFOGulf DFOISDM DFOMTMS DFONL DFOQC MaineDMR NEFSC ROM
1998 1 1 0 48 1 21 585 70 0 1 0
1999 0 1 0 90 1 74 507 0 0 0 0
2000 18 1 0 99 15 58 536 0 0 0 0
2001 0 1 0 52 1 44 549 0 0 0 0
2002 7 1 0 104 1 52 496 0 0 0 0
2003 0 1 0 43 3 42 579 0 0 0 0
2004 0 1 0 94 2 24 671 177 0 0 0
2005 1 1 14 109 0 83 473 219 7 0 0
2006 4 1 11 78 4 80 482 236 6 0 0
2007 2 1 42 87 1 37 467 234 0 0 2
2008 0 1 10 82 5 38 478 265 1 0 0
2009 0 1 14 78 0 30 512 133 3 0 0
2010 1 1 10 95 0 44 535 111 0 0 0
2011 0 1 22 77 0 14 479 141 0 0 0
2012 0 1 8 86 0 10 0 152 0 0 0
2013 2 0 18 69 0 21 0 129 0 0 0
2014 1 0 22 101 0 6 0 0 0 0 0
2015 3 0 12 0 0 0 0 0 0 0 0
month
inst_by_mt <- with(adddata, table(month, institutioncode))
write.csv(inst_by_mt, file = "../output/bio/institutioncode_by_mth.csv")
A unique cell ID layer has been created in the environmental_data_preperation.Rmd. Now you want to extract the value of that layer to the .csv you are creating…
Load the env. data
and just grab the attributes of the cell_layer
cell_layer
class : RasterLayer
dimensions : 112, 102, 11424 (nrow, ncol, ncell)
resolution : 25000, 25000 (x, y)
extent : -1012223, 1537777, -228779.7, 2571220 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
data source : C:\Users\bunny\Documents\GitHub\Chapter_1\output\env\glo_unique_cell.tif
names : glo_unique_cell
values : 1, 6729 (min, max)
The occurrence data needs to be converted into spatialpoints dataframe
library(sp)
xy <- adddata[ ,c(3,2)] #3 = decimalLongitude, 2 = decimalLatitude
adddata_aea <- SpatialPointsDataFrame(coords = xy, data = adddata, proj4string = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
adddata_aea
class : SpatialPointsDataFrame
features : 11578
extent : -70.573, -45.53, 41.53367, 68.587 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
variables : 16
names : id, decimalLatitude, decimalLongitude, datecollected, institutioncode, individualcount, resname, originalscientificname, collectioncode, year, month, day, occurrence, nafo_zone, optional, ...
min values : 12115310, 41.53367, -45.53000, 1998-04-01 12:00:00+02, ARC, 0, Arctic Species Trend Index (ASTI) : Marine, Mallotus villosus, , 1998, 1, 1, 1, 0A, TRUE, ...
max values : 386782015, 68.58700, -70.57300, 2015-11-27 12:00:00+01, ROM, xx, Northern Gulf of St. Lawrence Fishes, Mallotus villosus, SUMMER_TELEOST, 2015, 12, 31, 1, HudsonStrait, TRUE, ...
adddata_aea <- spTransform(adddata_aea, CRS = "+proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") #note -60 used to be -91. Changed to 'straighten up'
adddata_aea
class : SpatialPointsDataFrame
features : 11578
extent : -858219.4, 1068577, 197997.9, 3188919 (xmin, xmax, ymin, ymax)
coord. ref. : +proj=aea +lat_1=50 +lat_2=70 +lat_0=40 +lon_0=-60 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs +towgs84=0,0,0
variables : 16
names : id, decimalLatitude, decimalLongitude, datecollected, institutioncode, individualcount, resname, originalscientificname, collectioncode, year, month, day, occurrence, nafo_zone, optional, ...
min values : 12115310, 41.53367, -45.53000, 1998-04-01 12:00:00+02, ARC, 0, Arctic Species Trend Index (ASTI) : Marine, Mallotus villosus, , 1998, 1, 1, 1, 0A, TRUE, ...
max values : 386782015, 68.58700, -70.57300, 2015-11-27 12:00:00+01, ROM, xx, Northern Gulf of St. Lawrence Fishes, Mallotus villosus, SUMMER_TELEOST, 2015, 12, 31, 1, HudsonStrait, TRUE, ...
and plot and Write the transformed species data to a new csv (this will also have the coordinates in meters - note meters are under decimalLongitude1 and decimalLatitude1)…
plot(adddata_aea, axes=TRUE, cex.axis=.95)
write.csv(adddata_aea, file = "../output/bio/data_aea.csv", row.names = FALSE)
Ok now plot the env. and point data…
plot(cell_layer)
points(adddata_aea$decimalLongitude, adddata_aea$decimalLatitude)
ok now the next step is to extract the unique cell value and add to each of the points then write a .csv then convert back to a normal dataframe
adddata_aea$cell_id <- extract(x=cell_layer, y = adddata_aea)
write.csv(adddata_aea, file = "../output/bio/data_aea_cell.csv", row.names = FALSE)
data_aea <- as.data.frame(adddata_aea)
head(data_aea)
looking at the header data, there seems to be some new columns created on the way…
colnames(data_aea)
[1] "id" "decimalLatitude" "decimalLongitude" "datecollected" "institutioncode"
[6] "individualcount" "resname" "originalscientificname" "collectioncode" "year"
[11] "month" "day" "occurrence" "nafo_zone" "optional"
[16] "gear" "cell_id" "decimalLongitude.1" "decimalLatitude.1"
I don’t want “optional” - get rid of it!
data_aea <- subset(data_aea, select = -c(optional))
colnames(data_aea)
[1] "id" "decimalLatitude" "decimalLongitude" "datecollected" "institutioncode"
[6] "individualcount" "resname" "originalscientificname" "collectioncode" "year"
[11] "month" "day" "occurrence" "nafo_zone" "gear"
[16] "cell_id" "decimalLongitude.1" "decimalLatitude.1"
write.csv(adddata_aea, file = "../output/bio/data_aea_cell.csv", row.names = FALSE)
The data has been prepared as per the environmental_data_preperation.Rmd
amo <- read.csv("../output/env/amo.csv", header=TRUE)
head(amo)
So you want.. - AMO value at sampling month/year - AMO value at previous sampling month/year - AMO phase at previous winter (because the winter values are thought to be a major driver of ocean conditions in the following spring, summer, and autumn)
change month col names to numeric
names(amo)[2:13] = c("1", "2","3", "4", "5", "6", "7", "8", "9", "10", "11", "12")
head(amo)
Start with the month data first…subset cols 1:13 (where the year and month data are held)
amo_yrmt <- amo[ ,1:13]
amo_yrmt
amo_yrmt <- reshape(amo_yrmt,
varying = c("1", "2","3", "4", "5", "6", "7", "8", "9", "10", "11", "12"),
v.names = "value",
direction = "long")
amo_yrmt <- amo_yrmt[order(amo_yrmt$year),]
amo_yrmt
rename time to month.. and value to amo_sample
colnames(amo_yrmt)[2] <- "month"
colnames(amo_yrmt)[3] <- "amo_sample"
amo_yrmt
now match the year and the month in the two dataframes, and populate the species dataset (data_aea) with amo_sample
data_aea <- merge(x = data_aea, y = amo_yrmt[ , c("year", "month", "amo_sample")], by = c("year", "month"), all.x = TRUE)
data_aea
ok next - AMO value at previous sampling month/year
shift the amo_sample column down by 1…
library(useful)
Loading required package: ggplot2
amo_yrmt <- shift.column(data = amo_yrmt, columns = "amo_sample", up = FALSE)
amo_yrmt
Note the shifted values are in a new column called amo_sample.Shifted. rename to amo_prev
colnames(amo_yrmt)[colnames(amo_yrmt)=="amo_sample.Shifted"] <- "amo_prev"
amo_yrmt
write.csv(amo_yrmt, "../output/env/amo_prev.csv", row.names = FALSE) #for some reason i need to write to a csv and reload for the next step to work properly
now grab the data…
amo_prev <- read.csv("../output/env/amo_prev.csv")
data_aea <- merge(x = data_aea, y = amo_prev[ , c("year", "month", "amo_prev")], by = c("year", "month"), all.x = TRUE)
data_aea
and now - AMO phase at previous winter (because the winter values are thought to be a major driver of ocean conditions in the following spring, summer, and autumn)
same process as before - shift row then match
But first!
You need to extract the seasonal values from the amo dataset
amo_winter <- subset(amo, select = c("year", "WinterAvg"))
amo_winter
now shift the winter average data
amo_winter <- shift.column(data = amo_winter, columns = "WinterAvg", up = FALSE)
amo_winter
write.csv(amo_winter, "../output/env/amo_winter.csv", row.names = FALSE) #for some reason need to save to csv then reload in the next step...
remember your shifted column is called WinterAvg.Shifted
now extract the WinterAvg.Shifted values to the species dataset
amo_winter <- read.csv("../output/env/amo_winter.csv")
data_aea <- merge(x = data_aea, y = amo_winter[ , c("year", "WinterAvg.Shifted")], by = c("year"), all.x = TRUE)
colnames(data_aea)[colnames(data_aea)=="WinterAvg.Shifted"] <- "amo_winter" #change the colname
data_aea
ok and write a csv
write.csv(data_aea,"../output/bio/data_aea_cell_amo.csv", row.names = FALSE)
The data has been prepared as per the environmental_data_preperation.Rmd
nao <- read.csv("../output/env/nao.csv", header=TRUE)
head(nao)
So you want.. - AMO value at sampling month/year - AMO value at previous sampling month/year - AMO phase at previous winter (because the winter values are thought to be a major driver of ocean conditions in the following spring, summer, and autumn)
change month col names to just numeric
names(nao)[2:13] = c("1", "2","3", "4", "5", "6", "7", "8", "9", "10", "11", "12")
head(nao)
Start with the month data first…subset cols 1:13 (where the year and month data are held)
nao_yrmt <- nao[ ,1:13]
nao_yrmt
nao_yrmt <- reshape(nao_yrmt,
varying = c("1", "2","3", "4", "5", "6", "7", "8", "9", "10", "11", "12"),
v.names = "value",
direction = "long")
nao_yrmt <- nao_yrmt[order(amo_yrmt$year),]
nao_yrmt
rename time to month.. and value to nao_sample
colnames(nao_yrmt)[2] <- "month"
colnames(nao_yrmt)[3] <- "nao_sample"
nao_yrmt
now match the year and the month in the two dataframes, and populate the species dataset (data_aea) with nao_sample
data_aea <- merge(x = data_aea, y = nao_yrmt[ , c("year", "month", "nao_sample")], by = c("year", "month"), all.x = TRUE)
data_aea
ok next - NAO value at previous sampling month/year
shift the nao_sample column down by 1…
library(useful)
nao_yrmt <- shift.column(data = nao_yrmt, columns = "nao_sample", up = FALSE)
nao_yrmt
Note the shifted values are in a new column called amo_sample.Shifted. rename to amo_prev
colnames(nao_yrmt)[colnames(nao_yrmt)=="nao_sample.Shifted"] <- "nao_prev"
nao_yrmt
write.csv(nao_yrmt, "../output/env/nao_prev.csv", row.names = FALSE) #for some reason i need to write to a csv and reload for the next step to work properly
now grab the data…
nao_prev <- read.csv("../output/env/nao_prev.csv")
data_aea <- merge(x = data_aea, y = nao_prev[ , c("year", "month", "nao_prev")], by = c("year", "month"), all.x = TRUE)
data_aea
and now - NAO phase at previous winter (because the winter values are thought to be a major driver of ocean conditions in the following spring, summer, and autumn)
same process as before - shift row then match
But first!
You need to extract the seasonal values from the NAO dataset
nao_winter <- subset(nao, select = c("year", "WinterAvg"))
nao_winter
now shift the winter average data
nao_winter <- shift.column(data = nao_winter, columns = "WinterAvg", up = FALSE)
nao_winter
write.csv(nao_winter, "../output/env/nao_winter.csv", row.names = FALSE) #for some reason need to save to csv then reload in the next step...
remember your shifted column is called WinterAvg.Shifted
now extract the WinterAvg.Shifted values to the species dataset
nao_winter <- read.csv("../output/env/nao_winter.csv")
data_aea <- merge(x = data_aea, y = nao_winter[ , c("year", "WinterAvg.Shifted")], by = c("year"), all.x = TRUE)
colnames(data_aea)[colnames(data_aea)=="WinterAvg.Shifted"] <- "nao_winter" #change the colname
data_aea
ok and write a csv
write.csv(data_aea,"../output/bio/data_aea_cell_amo_nao.csv", row.names = FALSE)